In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
In [2]:
class ColumnsData:
    date = 'Date'
    province = 'Province'
    island = 'Island'
    cases = 'Total Cases'
    deaths = 'Total Deaths'
    recovered = 'Total Recovered'
    actives_cases = 'Total Active Cases'
    population = 'Population'
    area = 'Area (km2)'
    mortality = 'Mortality'
    density = 'Population Density'
In [3]:
def create_bin(df, columns, q=5):
    for column in columns:
        df[column] = pd.qcut(df[column], q, duplicates='drop').cat.codes
In [4]:
def normalisasi_data(df, columns):
    minMaxScaler = MinMaxScaler()
    df[columns] = minMaxScaler.fit_transform(d[columns])
In [5]:
data = pd.read_csv('covid19.csv')
pd.options.display.max_columns = None
data.head().T
Out[5]:
0 1 2 3 4
Date 3/1/2020 3/2/2020 3/2/2020 3/2/2020 3/3/2020
Location ISO Code ID-JK ID-JK IDN ID-RI ID-JK
Location DKI Jakarta DKI Jakarta Indonesia Riau DKI Jakarta
New Cases 2 2 2 1 2
New Deaths 0 0 0 0 0
New Recovered 0 0 0 0 0
New Active Cases 2 2 2 1 2
Total Cases 39 41 2 2 43
Total Deaths 20 20 0 0 20
Total Recovered 41 41 0 3 41
Total Active Cases -22 -20 2 -1 -18
Location Level Province Province Country Province Province
City or Regency NaN NaN NaN NaN NaN
Province DKI Jakarta DKI Jakarta NaN Riau DKI Jakarta
Country Indonesia Indonesia Indonesia Indonesia Indonesia
Continent Asia Asia Asia Asia Asia
Island Jawa Jawa NaN Sumatera Jawa
Time Zone UTC+07:00 UTC+07:00 NaN UTC+07:00 UTC+07:00
Special Status Daerah Khusus Ibu Kota Daerah Khusus Ibu Kota NaN NaN Daerah Khusus Ibu Kota
Total Regencies 1 1 416 10 1
Total Cities 5.0 5.0 98.0 2.0 5.0
Total Districts 44 44 7230 169 44
Total Urban Villages 267.0 267.0 8488.0 268.0 267.0
Total Rural Villages NaN NaN 74953.0 1591.0 NaN
Area (km2) 664 664 1916907 87024 664
Population 10846145 10846145 265185520 6074100 10846145
Population Density 16334.31 16334.31 138.34 69.8 16334.31
Longitude 106.836118 106.836118 113.921327 101.805109 106.836118
Latitude -6.204699 -6.204699 -0.789275 0.511648 -6.204699
New Cases per Million 0.18 0.18 0.01 0.16 0.18
Total Cases per Million 3.6 3.78 0.01 0.33 3.96
New Deaths per Million 0.0 0.0 0.0 0.0 0.0
Total Deaths per Million 1.84 1.84 0.0 0.0 1.84
Total Deaths per 100rb 0.18 0.18 0.0 0.0 0.18
Case Fatality Rate 51.28% 48.78% 0.00% 0.00% 46.51%
Case Recovered Rate 105.13% 100.00% 0.00% 150.00% 95.35%
Growth Factor of New Cases NaN 1.0 NaN NaN 1.0
Growth Factor of New Deaths NaN 1.0 NaN NaN 1.0
In [6]:
data.shape
Out[6]:
(21759, 38)
In [7]:
data.isnull().sum()
Out[7]:
Date                               0
Location ISO Code                  0
Location                           0
New Cases                          0
New Deaths                         0
New Recovered                      0
New Active Cases                   0
Total Cases                        0
Total Deaths                       0
Total Recovered                    0
Total Active Cases                 0
Location Level                     0
City or Regency                21759
Province                         642
Country                            0
Continent                          0
Island                           642
Time Zone                        642
Special Status                 18636
Total Regencies                    0
Total Cities                     614
Total Districts                    0
Total Urban Villages             617
Total Rural Villages             642
Area (km2)                         0
Population                         0
Population Density                 0
Longitude                          0
Latitude                           0
New Cases per Million              0
Total Cases per Million            0
New Deaths per Million             0
Total Deaths per Million           0
Total Deaths per 100rb             0
Case Fatality Rate                 0
Case Recovered Rate                0
Growth Factor of New Cases      1187
Growth Factor of New Deaths     2467
dtype: int64
In [8]:
data.dtypes
Out[8]:
Date                            object
Location ISO Code               object
Location                        object
New Cases                        int64
New Deaths                       int64
New Recovered                    int64
New Active Cases                 int64
Total Cases                      int64
Total Deaths                     int64
Total Recovered                  int64
Total Active Cases               int64
Location Level                  object
City or Regency                float64
Province                        object
Country                         object
Continent                       object
Island                          object
Time Zone                       object
Special Status                  object
Total Regencies                  int64
Total Cities                   float64
Total Districts                  int64
Total Urban Villages           float64
Total Rural Villages           float64
Area (km2)                       int64
Population                       int64
Population Density             float64
Longitude                      float64
Latitude                       float64
New Cases per Million          float64
Total Cases per Million        float64
New Deaths per Million         float64
Total Deaths per Million       float64
Total Deaths per 100rb         float64
Case Fatality Rate              object
Case Recovered Rate             object
Growth Factor of New Cases     float64
Growth Factor of New Deaths    float64
dtype: object
In [9]:
data = data[[
    ColumnsData.date,
    ColumnsData.province,
    ColumnsData.island,
    ColumnsData.cases,
    ColumnsData.deaths,
    ColumnsData.recovered,
    ColumnsData.actives_cases,
    ColumnsData.population,
    ColumnsData.area,
    ColumnsData.density
]]
In [10]:
data.isnull().sum()
Out[10]:
Date                    0
Province              642
Island                642
Total Cases             0
Total Deaths            0
Total Recovered         0
Total Active Cases      0
Population              0
Area (km2)              0
Population Density      0
dtype: int64
In [11]:
data = data.dropna(axis=0, how="any")
In [12]:
data.isnull().sum()
Out[12]:
Date                  0
Province              0
Island                0
Total Cases           0
Total Deaths          0
Total Recovered       0
Total Active Cases    0
Population            0
Area (km2)            0
Population Density    0
dtype: int64
In [13]:
data.head()
Out[13]:
Date Province Island Total Cases Total Deaths Total Recovered Total Active Cases Population Area (km2) Population Density
0 3/1/2020 DKI Jakarta Jawa 39 20 41 -22 10846145 664 16334.31
1 3/2/2020 DKI Jakarta Jawa 41 20 41 -20 10846145 664 16334.31
3 3/2/2020 Riau Sumatera 2 0 3 -1 6074100 87024 69.80
4 3/3/2020 DKI Jakarta Jawa 43 20 41 -18 10846145 664 16334.31
6 3/3/2020 Jawa Barat Jawa 1 1 8 -8 45161325 35378 1276.55
In [14]:
data['Total Active Cases'] = data['Total Active Cases'].clip(lower=0)
In [15]:
data[ColumnsData.date] = pd.to_datetime(data.Date, infer_datetime_format=True).dt.date
data.head()
Out[15]:
Date Province Island Total Cases Total Deaths Total Recovered Total Active Cases Population Area (km2) Population Density
0 2020-03-01 DKI Jakarta Jawa 39 20 41 0 10846145 664 16334.31
1 2020-03-02 DKI Jakarta Jawa 41 20 41 0 10846145 664 16334.31
3 2020-03-02 Riau Sumatera 2 0 3 0 6074100 87024 69.80
4 2020-03-03 DKI Jakarta Jawa 43 20 41 0 10846145 664 16334.31
6 2020-03-03 Jawa Barat Jawa 1 1 8 0 45161325 35378 1276.55
In [16]:
data[ColumnsData.mortality] = data[ColumnsData.deaths] / data[ColumnsData.cases]
In [17]:
data.head().T
Out[17]:
0 1 3 4 6
Date 2020-03-01 2020-03-02 2020-03-02 2020-03-03 2020-03-03
Province DKI Jakarta DKI Jakarta Riau DKI Jakarta Jawa Barat
Island Jawa Jawa Sumatera Jawa Jawa
Total Cases 39 41 2 43 1
Total Deaths 20 20 0 20 1
Total Recovered 41 41 3 41 8
Total Active Cases 0 0 0 0 0
Population 10846145 10846145 6074100 10846145 45161325
Area (km2) 664 664 87024 664 35378
Population Density 16334.31 16334.31 69.8 16334.31 1276.55
Mortality 0.512821 0.487805 0.0 0.465116 1.0
In [19]:
dfl = data[[ColumnsData.date, ColumnsData.cases, ColumnsData.deaths, 
            ColumnsData.recovered]].groupby(ColumnsData.date).sum().reset_index()

dfl = dfl[(dfl[ColumnsData.cases] >= 100)].melt(id_vars=ColumnsData.date,
                                    value_vars=[ColumnsData.cases, 
                                    ColumnsData.deaths, ColumnsData.recovered])
In [20]:
plot_a = px.line(dfl, x=ColumnsData.date, y='value', color='variable', template="plotly_white")
plot_a.update_layout(title='COVID-19 in Indonesia: total number of cases over time',
                     xaxis_title='Indonesia', yaxis_title='Number of cases',
                     legend=dict(x=0.02, y=0.98))
plot_a.show()
In [21]:
pd.options.mode.chained_assignment = None
limit = 5
group = data.groupby(ColumnsData.province)
t = group.tail(1).sort_values(ColumnsData.cases, ascending=False).set_index(ColumnsData.province).drop(
    columns=[ColumnsData.date])

s = data[(data[ColumnsData.province].isin([i for i in t.index[:limit]]))]
s = s[(s[ColumnsData.cases] >= 1000)]

plot_b = px.line(s, x=ColumnsData.date, y=ColumnsData.cases, color=ColumnsData.province, template="plotly_white")
plot_b.update_layout(title='COVID-19 in Indonesia: total number of cases over time',
                      xaxis_title=ColumnsData.date, yaxis_title='Number of cases',
                      legend_title='<b>Top %s provinces</b>' % limit,
                      legend=dict(x=0.02, y=0.98))
plot_b.show()
In [22]:
heatmap = data[(data[ColumnsData.cases] >= 100)].sort_values([ColumnsData.date, ColumnsData.province])
vis_hmap = go.Figure(data=go.Heatmap(
    z=heatmap[ColumnsData.cases],
    x=heatmap[ColumnsData.date],
    y=heatmap[ColumnsData.province],
    colorscale='Plasma'))

vis_hmap.update_layout(
    title='COVID-19 in Indonesia: number of cases over time', xaxis_nticks=20)

vis_hmap.show()
In [23]:
corr = t.corr().iloc[[0, 1]].transpose()
corr = corr[(corr[ColumnsData.cases] > 0.25)].sort_values(ColumnsData.cases, ascending=False)
features = corr.index.tolist()
features.append(ColumnsData.mortality)
print('Selected features:', features)

d = t[features].copy()
d.head(10)
Selected features: ['Total Cases', 'Total Recovered', 'Total Deaths', 'Population Density', 'Population', 'Total Active Cases', 'Mortality']
Out[23]:
Total Cases Total Recovered Total Deaths Population Density Population Total Active Cases Mortality
Province
DKI Jakarta 864045 849875 13596 16334.31 10846145 574 0.015735
Jawa Barat 707934 692101 14737 1276.55 45161325 1096 0.020817
Jawa Tengah 486435 454837 30225 1108.64 36364072 1373 0.062136
Jawa Timur 399478 369537 29697 846.78 40479023 244 0.074340
Kalimantan Timur 158245 152714 5449 27.52 3552191 82 0.034434
Daerah Istimewa Yogyakarta 156769 150965 5263 1158.90 3631015 541 0.033572
Banten 132693 129872 2688 1109.64 10722374 133 0.020257
Riau 128825 124123 4109 69.80 6074100 593 0.031896
Bali 114233 110003 4046 729.43 4216171 184 0.035419
Sulawesi Selatan 109919 107630 2238 201.78 9426885 51 0.020360
In [29]:
create_bin(d, [
    ColumnsData.cases,
    ColumnsData.recovered,
    ColumnsData.density,
    ColumnsData.actives_cases,
    ColumnsData.deaths,
    ColumnsData.population,
    ColumnsData.mortality
], q=8)

normalisasi_data(d, d.columns)
d.head(20).T
Out[29]:
Province DKI Jakarta Jawa Barat Jawa Tengah Jawa Timur Kalimantan Timur Daerah Istimewa Yogyakarta Banten Riau Bali Sulawesi Selatan Sumatera Utara Sumatera Barat Kalimantan Selatan Nusa Tenggara Timur Sumatera Selatan Kepulauan Riau Kepulauan Bangka Belitung Lampung Sulawesi Tengah Kalimantan Tengah
Total Cases 1.0 1.000000 1.0 1.000000 1.000000 0.857143 0.857143 0.857143 0.857143 0.714286 0.714286 0.714286 0.714286 0.571429 0.571429 0.571429 0.571429 0.428571 0.428571 0.428571
Total Recovered 1.0 1.000000 1.0 1.000000 1.000000 0.857143 0.857143 0.857143 0.857143 0.714286 0.714286 0.714286 0.714286 0.571429 0.571429 0.571429 0.571429 0.428571 0.428571 0.428571
Total Deaths 1.0 1.000000 1.0 1.000000 1.000000 0.857143 0.714286 0.857143 0.857143 0.571429 0.714286 0.571429 0.714286 0.428571 0.714286 0.571429 0.428571 0.857143 0.428571 0.428571
Population Density 1.0 1.000000 1.0 0.857143 0.000000 1.000000 1.000000 0.285714 0.857143 0.714286 0.714286 0.571429 0.571429 0.571429 0.428571 0.714286 0.285714 0.857143 0.142857 0.000000
Population 1.0 1.000000 1.0 1.000000 0.428571 0.428571 0.857143 0.714286 0.571429 0.857143 1.000000 0.714286 0.428571 0.714286 0.857143 0.142857 0.000000 0.857143 0.285714 0.285714
Total Active Cases 1.0 1.000000 1.0 0.857143 0.428571 0.857143 0.714286 1.000000 0.714286 0.428571 0.571429 0.571429 0.285714 0.714286 0.142857 0.000000 0.571429 0.857143 0.285714 0.428571
Mortality 0.0 0.142857 1.0 1.000000 0.857143 0.714286 0.000000 0.714286 0.857143 0.142857 0.428571 0.285714 0.857143 0.142857 1.000000 0.714286 0.428571 1.000000 0.714286 0.571429
In [30]:
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']]  
In [31]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

#Elbow Method - Inertia plot
inertia = []
for k in range(1, 10):
    cluster_model = KMeans(n_clusters = k, random_state = 24)
    cluster_model.fit(X)
    inertia_value = cluster_model.inertia_
    inertia.append(inertia_value)
fig, ax = plt.subplots(figsize=(18, 16))
plt.plot(range(1, 10), inertia)
plt.title('The Elbow Method - Inertia plot', fontsize = 20)
plt.xlabel('No. of Clusters')
plt.ylabel('WCSS')
plt.show()
C:\Users\raditya\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:881: UserWarning:

KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

In [34]:
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']] .values # Ambil value/nilai atribut Spending Score dan Annual Income
kmeans = KMeans(n_clusters=5, # Jumlah cluster K
                init='k-means++',  # Metode inisialisasi centroid
                random_state=111)
In [39]:
y_kmeans = kmeans.fit_predict(X) # Fit dan prediksi dataset
y_kmeans
Out[39]:
array([4, 4, 1, 1, 1, 1, 4, 1, 1, 4, 4, 4, 2, 4, 2, 2, 2, 1, 2, 2, 2, 2,
       3, 2, 0, 3, 2, 0, 3, 3, 3, 3, 3, 3])
In [40]:
plt.figure(1, figsize=(12, 5))
plt.scatter(X[y_kmeans==0, 0], X[y_kmeans==0, 1], s=50, c='red', label='Cluster 1')
plt.scatter(X[y_kmeans==1, 0], X[y_kmeans==1, 1], s=50, c='blue', label='Cluster 2')
plt.scatter(X[y_kmeans==2, 0], X[y_kmeans==2, 1], s=50, c='green', label='Cluster 3')
plt.scatter(X[y_kmeans==3, 0], X[y_kmeans==3, 1], s=50, c='cyan', label='Cluster 4')
plt.scatter(X[y_kmeans==4, 0], X[y_kmeans==4, 1], s=50, c='black', label='Cluster 5')


plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='purple', label='Centroids')
plt.title('Clusters of customers')
plt.xlabel('Province')
plt.ylabel('Deaths per case')
plt.legend()
plt.show()
In [44]:
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']] .values # Ambil value/nilai atribut Spending Score dan Annual Income
kmeans = KMeans(n_clusters=6, # Jumlah cluster K
                init='k-means++',  # Metode inisialisasi centroid
                random_state=111)
In [45]:
y_kmeans = kmeans.fit_predict(X) # Fit dan prediksi dataset
y_kmeans
Out[45]:
array([3, 3, 1, 1, 2, 1, 3, 1, 1, 5, 5, 5, 2, 5, 2, 2, 4, 1, 4, 4, 4, 2,
       0, 4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0])
In [46]:
plt.figure(1, figsize=(12, 5))
plt.scatter(X[y_kmeans==0, 0], X[y_kmeans==0, 1], s=50, c='red', label='Cluster 1')
plt.scatter(X[y_kmeans==1, 0], X[y_kmeans==1, 1], s=50, c='blue', label='Cluster 2')
plt.scatter(X[y_kmeans==2, 0], X[y_kmeans==2, 1], s=50, c='green', label='Cluster 3')
plt.scatter(X[y_kmeans==3, 0], X[y_kmeans==3, 1], s=50, c='cyan', label='Cluster 4')
plt.scatter(X[y_kmeans==4, 0], X[y_kmeans==4, 1], s=50, c='black', label='Cluster 5')
plt.scatter(X[y_kmeans==5, 0], X[y_kmeans==5, 1], s=50, c='pink', label='Cluster 6')


plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='purple', label='Centroids')
plt.title('Clusters of customers')
plt.xlabel('Province')
plt.ylabel('Deaths per case')
plt.legend()
plt.show()
In [48]:
kmeans = KMeans(n_clusters=6)
pred = kmeans.fit_predict(d[d.columns])
t['K-means Cluster Results'], d['K-means Cluster Results'] = [pred, pred]
d[d.columns].sort_values(['K-means Cluster Results', ColumnsData.mortality, 
                          ColumnsData.cases, ColumnsData.actives_cases, 
                          ColumnsData.density], ascending=True)
Out[48]:
Total Cases Total Recovered Total Deaths Population Density Population Total Active Cases Mortality K-means Cluster Results
Province
Daerah Istimewa Yogyakarta 0.857143 0.857143 0.857143 1.000000 0.428571 0.857143 0.714286 0
Riau 0.857143 0.857143 0.857143 0.285714 0.714286 1.000000 0.714286 0
Bali 0.857143 0.857143 0.857143 0.857143 0.571429 0.714286 0.857143 0
Lampung 0.428571 0.428571 0.857143 0.857143 0.857143 0.857143 1.000000 0
Jawa Timur 1.000000 1.000000 1.000000 0.857143 1.000000 0.857143 1.000000 0
Jawa Tengah 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0
Sulawesi Selatan 0.714286 0.714286 0.571429 0.714286 0.857143 0.428571 0.142857 1
Sumatera Barat 0.714286 0.714286 0.571429 0.571429 0.714286 0.571429 0.285714 1
Sumatera Utara 0.714286 0.714286 0.714286 0.714286 1.000000 0.571429 0.428571 1
Maluku 0.000000 0.000000 0.000000 0.142857 0.142857 0.000000 0.000000 2
Papua Barat 0.142857 0.142857 0.000000 0.000000 0.000000 0.857143 0.000000 2
Bengkulu 0.142857 0.142857 0.142857 0.428571 0.142857 0.142857 0.142857 2
Maluku Utara 0.000000 0.000000 0.000000 0.142857 0.000000 0.285714 0.285714 2
Kalimantan Utara 0.285714 0.285714 0.285714 0.000000 0.000000 0.142857 0.285714 2
Sulawesi Tenggara 0.000000 0.000000 0.142857 0.285714 0.285714 0.428571 0.428571 2
Jambi 0.142857 0.142857 0.142857 0.285714 0.428571 0.000000 0.428571 2
Sulawesi Barat 0.000000 0.000000 0.000000 0.428571 0.142857 0.000000 0.571429 2
Gorontalo 0.000000 0.000000 0.000000 0.571429 0.000000 0.000000 0.857143 2
Papua 0.285714 0.285714 0.142857 0.000000 0.571429 1.000000 0.000000 3
Nusa Tenggara Timur 0.571429 0.571429 0.428571 0.571429 0.714286 0.714286 0.142857 3
Kalimantan Barat 0.428571 0.428571 0.285714 0.142857 0.714286 0.285714 0.285714 3
Kepulauan Bangka Belitung 0.571429 0.571429 0.428571 0.285714 0.000000 0.571429 0.428571 3
Nusa Tenggara Barat 0.142857 0.142857 0.285714 0.857143 0.571429 0.571429 0.571429 3
Sulawesi Utara 0.285714 0.285714 0.285714 0.714286 0.285714 0.571429 0.571429 3
Kalimantan Tengah 0.428571 0.428571 0.428571 0.000000 0.285714 0.428571 0.571429 3
Sulawesi Tengah 0.428571 0.428571 0.428571 0.142857 0.285714 0.285714 0.714286 3
Kepulauan Riau 0.571429 0.571429 0.571429 0.714286 0.142857 0.000000 0.714286 4
Kalimantan Selatan 0.714286 0.714286 0.714286 0.571429 0.428571 0.285714 0.857143 4
Kalimantan Timur 1.000000 1.000000 1.000000 0.000000 0.428571 0.428571 0.857143 4
Aceh 0.285714 0.285714 0.571429 0.428571 0.571429 0.142857 1.000000 4
Sumatera Selatan 0.571429 0.571429 0.714286 0.428571 0.857143 0.142857 1.000000 4
Banten 0.857143 0.857143 0.714286 1.000000 0.857143 0.714286 0.000000 5
DKI Jakarta 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 5
Jawa Barat 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.142857 5
In [51]:
vis_tmap = px.treemap(t.reset_index(), path=['K-means Cluster Results', ColumnsData.province], values=ColumnsData.cases)
vis_tmap.update_layout(title='K-means clusters untuk kasus di setiap provinsi')
vis_tmap.show()
In [52]:
vis_tmap = px.treemap(t.reset_index(), path=['K-means Cluster Results', ColumnsData.province], values=ColumnsData.mortality)
vis_tmap.update_layout(title='K-means clusters untuk rata rata kematian di setiap provinsi')
vis_tmap.show()
In [54]:
c = t.sort_values(['K-means Cluster Results', ColumnsData.cases], ascending=False)
data = [go.Bar(x=c[(c['K-means Cluster Results'] == i)].index, y=c[(c['K-means Cluster Results'] == i)][ColumnsData.cases],
               text=c[(c['K-means Cluster Results'] == i)][ColumnsData.cases], name=i) for i in range(0, 10)]

vis_bar = go.Figure(data=data)
vis_bar.update_layout(title='K-means Clustering: kasus di setiap provinsi',
                      xaxis_title='Indonesia State', yaxis_title='Deaths per case')
vis_bar.show()
In [56]:
# visualization mortality rate by clusters
c = t.sort_values(['K-means Cluster Results', ColumnsData.mortality], ascending=False)
data = [go.Bar(x=c[(c['K-means Cluster Results'] == i)].index, y=c[(c['K-means Cluster Results'] == i)][ColumnsData.mortality],
               text=c[(c['K-means Cluster Results'] == i)][ColumnsData.mortality], name=i) for i in range(0, 10)]
data.append(
    go.Scatter(
        x=t.sort_values(ColumnsData.mortality, ascending=False).index,
        y=np.full((1, len(t.index)), 0.03).tolist()[0],
        marker_color='black',
        name='Indonesian avg'
    )
)

vis_bar2 = go.Figure(data=data)
vis_bar2.update_layout(title='K-means Clustering: rata rata kematian di setiap provinsi',
                       xaxis_title='Indonesian states', yaxis_title='Deaths per case')
vis_bar2.show()
In [ ]: